kanazawaR_03_test

作者

Yuko SEKIYA

公開

2025年5月10日

目的

観光庁のデータを読み込み、可視化する

内容

2023年 / 2024年の宿泊旅行統計調査(確定値 / 第2次速報値)から第1表(施設数)と第8表(稼働率)を都道府県別に時系列に比較

データの取り込み

  • 準備
rawdataPath2023 <- "./rawdata/宿泊旅行統計調査_2023_確定値_001750679.xlsx"
rawdataPath2024 <- "./rawdata/宿泊旅行統計調査_2024_第2次速報値_001867118.xlsx"
sheetlist2023 <- excel_sheets(rawdataPath2023)
sheetlist_第1表 <- sheetlist2023[stri_detect_regex(sheetlist2023, "^第1表")]
sheetlist_第8表 <- sheetlist2023[stri_detect_regex(sheetlist2023, "^第8表")]
colname_base <- c("小規模_総数", "小規模_観光", "小規模_非観光", "小中規模_総数", "小中規模_観光", "小中規模_非観光", "中規模_総数", "中規模_観光", "中規模_非観光", "大規模_総数", "大規模_観光", "大規模_非観光", "旅館", "リゾートホテル", "ビジネスホテル", "シティホテル", "簡易宿所", "会社・団体の宿泊所")

第1表

  • 読み取り用関数
# 第1表(施設数)
colname_HotelCount <- c("CD所在地", "施設数.全体_総数", "施設数.全体_観光", "施設数.全体_非観光", paste0("施設数.", colname_base))
func_HotelCountData <- function(X){
    rawdat <- read_excel(path = filepath,
                         sheet = X,
                         na = "-",
                         col_names = FALSE,
                         col_types = "text",
                         range = "R8C1:R54C22")
    setnames(rawdat, colname_HotelCount)
    rawdat <- rawdat %>%
        mutate(Year = stri_sub(filepath, 20, 23),
               Month = as.numeric(stri_replace_all_fixed(X, "第1表(", "") %>% stri_replace_all_fixed("月)", ""))) %>%
        select(one_of(c("Year", "Month", colname_HotelCount)))
    return(rawdat)
}
  • 読み取り
filepath <- rawdataPath2023
HotelCountData_2023 <- purrr::map_dfr(sheetlist_第1表, func_HotelCountData) %>%
    mutate(across(4:24, ~as.numeric(formatC(.x, format = "fg"))),
           CD所在地 = stri_sub_replace(CD所在地, stri_locate_first_regex(CD所在地, '[0-9]+'), omit_na=TRUE, replacement= "")) 
filepath <- rawdataPath2024
HotelCountData_2024 <- purrr::map_dfr(sheetlist_第1表, func_HotelCountData) %>%
    mutate(across(4:24, ~as.numeric(formatC(.x, format = "fg"))))

第8表

  • 読み取り用関数
colname_OperatingRatio <- c("CD所在地", "稼働率.全体_総数", "稼働率.全体_観光", "稼働率.全体_非観光", paste0("稼働率.", colname_base))
# 第8表(稼働率)
func_OperatingRatio <- function(X){
    rawdat <- read_excel(path = filepath,
                         sheet = X,
                         na = "-",
                         col_names = FALSE,
                         col_types = "text",
                         range = "R8C1:R54C22")
    setnames(rawdat, colname_OperatingRatio)
    rawdat <- rawdat %>%
        mutate(Year = stri_sub(filepath, 20, 23),
               Month = as.numeric(stri_replace_all_fixed(X, "第8表(", "") %>% stri_replace_all_fixed("月)", ""))) %>%
        select(one_of(c("Year", "Month", colname_OperatingRatio)))
    return(rawdat)
}
  • 読み取り
filepath <- rawdataPath2023
OperatingRatio_2023 <- purrr::map_dfr(sheetlist_第8表, func_OperatingRatio) %>%
    mutate(across(4:24, ~as.numeric(formatC(.x, format = "fg"))),
           CD所在地 = stri_sub_replace(CD所在地, stri_locate_first_regex(CD所在地, '[0-9]+'), omit_na=TRUE, replacement= ""))
filepath <- rawdataPath2024
OperatingRatio_2024 <- purrr::map_dfr(sheetlist_第8表, func_OperatingRatio) %>%
    mutate(across(4:24, ~as.numeric(formatC(.x, format = "fg"))))

データ加工

  • 全部つなげて縦持ちに変換する
HotelCountData <- bind_rows(HotelCountData_2023, HotelCountData_2024) %>%
    pivot_longer(cols = starts_with("施設数"),
                 names_to = "cat",
                 values_to = "val") %>%
    separate(col = "cat",
             into = c("category", "colnames"),
             sep = "\\.") %>%
    pivot_wider(names_from = "colnames",
                values_from = "val")

OperatingRatio <- bind_rows(OperatingRatio_2023, OperatingRatio_2024) %>%
    pivot_longer(cols = starts_with("稼働率"),
                 names_to = "cat",
                 values_to = "val") %>%
    separate(col = "cat",
             into = c("category", "colnames"),
             sep = "\\.") %>%
    pivot_wider(names_from = "colnames",
                values_from = "val") %>%
    dplyr::filter(!is.na(Month))

HotelTotalData <- bind_rows(HotelCountData, OperatingRatio)

HotelTotalData_long <- HotelTotalData %>%
  pivot_longer(cols = 5:25,
               names_to = "Class",
               values_to = "val")

  • 施設数
DT::datatable(HotelCountData, rownames = FALSE, 
              filter = 'top',
              extensions = 'Buttons', options = list(
                  dom = 'Bfrtip',
                  buttons = list(list(extend = 'colvis', columns = c(3:24)))
                  )
              )
  • 稼働率
DT::datatable(OperatingRatio, rownames = FALSE, 
              filter = 'top',
              extensions = 'Buttons', options = list(
                  dom = 'Bfrtip',
                  buttons = list(list(extend = 'colvis', columns = c(3:24)))
                  )
              )

可視化

  • 施設数の変動
HotelCountData <- HotelCountData %>%
    mutate(YMD = as.Date(paste(Year, Month, "01", sep = "-")))
HotelCount_plot <- HotelCountData %>%
    ggplot(aes(x = YMD, y = 全体_総数, colour = CD所在地)) +
    geom_line() +
    geom_point()

ggplotly(HotelCount_plot)
  • 稼働率の変動
OperatingRatio <- OperatingRatio %>%
    mutate(YMD = as.Date(paste(Year, Month, "01", sep = "-")))
OperatingRatio_plot <- OperatingRatio %>%
    ggplot(aes(x = YMD, y = 全体_総数, colour = CD所在地)) +
    geom_line() +
    geom_point()

ggplotly(OperatingRatio_plot)
  • 宿泊施設タイプ別変動
    • 石川県
pref <- "石川県"
prefClass_data <- HotelTotalData_long %>%
  mutate(YMD = as.Date(paste(Year, Month, "01", sep = "-"))) %>%
  filter(stri_detect_regex(.$CD所在地, pref)) %>%
  filter(!stri_detect_regex(.$Class, "_")) %>%
  pivot_wider(names_from = category, values_from = val) 
prefClass_HotelCount_plot <- prefClass_data %>%
  ggplot(aes(x = YMD, y = 施設数)) +
  geom_bar(aes(fill = Class, color = Class),
           stat = "identity")
ggplotly(prefClass_HotelCount_plot)
prefClass_OperatingRatio_plot <- prefClass_data %>%  
  ggplot(aes(x = YMD, y = 稼働率, fill = Class, color = Class)) +
  geom_line() +
  geom_point()
ggplotly(prefClass_OperatingRatio_plot)